5.1. Spurious associations

Load the Waffle House data.

library(rethinking)
data(WaffleDivorce)
d <- WaffleDivorce

Unload rethinking and load brms and, while we're at it, the tidyverse.

rm(WaffleDivorce)
detach(package:rethinking, unload = T)
library(brms)
library(tidyverse)

I'm not going to show the output, but you might go ahead and investigate the data.

head(d)
glimpse(d)

Now we have our data, we can reproduce Figure 5.1. One convenient way to get the handful of Sate labels into the plot was with the geom_text_repel() function from the ggrepel package.

# install.packages("ggrepel", depencencies = T)
library(ggrepel)

set.seed(1042) #  This makes the geom_text_repel() part reproducible
d %>%
  ggplot(aes(x = WaffleHouses/Population, y = Divorce)) +
  theme_bw() +
  stat_smooth(method = "lm", fullrange = T, size = 1/2,
              color = "firebrick4", fill = "firebrick", alpha = 1/5) +
  geom_point(size = 1.5, color = "firebrick4", alpha = 1/2) +
  geom_text_repel(data = d %>% 
                    filter(Loc %in% c("ME", "OK", "AR", "AL",
                                      "GA", "SC", "NJ")),
                  aes(label = Loc), size = 3) +
  scale_x_continuous(limits = c(0, 55)) +
  coord_cartesian(xlim = 0:50, ylim = 5:15) +
  labs(x = "Waffle Houses per million", 
       y = "Divorce rate") +
  theme(panel.grid = element_blank())  

Here we standardize the predictor, MedianAgeMarriage, and fit the first univariable model.

d <-
  d %>%
  mutate(MedianAgeMarriage.s = (MedianAgeMarriage - mean(MedianAgeMarriage))/sd(MedianAgeMarriage))

b5.1 <- 
  brm(data = d, family = gaussian,
      Divorce ~ 1 + MedianAgeMarriage.s,
      prior = c(set_prior("normal(0, 10)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 500, cores = 4)

print(b5.1)

Here it is, Figure 5.2.b.

# First we determine the range of MedianAgeMarriage.s values we'd like to feed into fitted()
nd <- tibble(MedianAgeMarriage.s = seq(from = -3, to = 3.5, 
                                       length.out = 30))

# Now we use fitted() to get the model-implied trajectories
fitd5.1 <- 
  fitted(b5.1, newdata = nd) %>%
  as_tibble() %>%
  bind_cols(nd)

# The plot
ggplot(data = fitd5.1, 
       aes(x = MedianAgeMarriage.s, y = Estimate)) +
  theme_bw() +
  geom_ribbon(aes(ymin = `Q2.5`, ymax = `Q97.5`),
              fill = "firebrick", alpha = 1/5) +
  geom_line(color = "firebrick4") +
  geom_point(data = d, 
             aes(x = MedianAgeMarriage.s, y = Divorce), 
             size = 2, color = "firebrick4") +
  labs(y = "Divorce") +
  coord_cartesian(xlim = range(d$MedianAgeMarriage.s), 
                  ylim = range(d$Divorce)) +
  theme(panel.grid = element_blank())                   

After standardizing Marriage, we're ready to fit our second univariable model.

d <-
  d %>%
  mutate(Marriage.s = (Marriage - mean(Marriage))/sd(Marriage))

b5.2 <- 
  brm(data = d, family = gaussian,
      Divorce ~ 1 + Marriage.s,
      prior = c(set_prior("normal(0, 10)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 500, cores = 4)

print(b5.2)

And now we prep for and plot our version of Figure 5.2.a.

nd <- tibble(Marriage.s = seq(from = -2.5, to = 3.5, 
                              length.out = 30))

fitd5.2 <- 
  fitted(b5.2, newdata = nd) %>%
  as_tibble() %>%
  bind_cols(nd)

ggplot(data = fitd5.2, 
       aes(x = Marriage.s, y = Estimate)) +
  theme_bw() +
  geom_ribbon(aes(ymin = `Q2.5`, ymax = `Q97.5`),
              fill = "firebrick", alpha = 1/5) +
  geom_line(color = "firebrick4") +
  geom_point(data = d, 
             aes(x = Marriage.s, y = Divorce), 
             size = 2, color = "firebrick4") +
  coord_cartesian(xlim = range(d$Marriage.s), 
                  ylim = range(d$Divorce)) +
  labs(y = "Divorce") +
  theme(panel.grid = element_blank())                   

5.1.2. Fitting the model.

Let's get both Marriage.s and MedianAgeMarriage.s in there with our very first multivariable model.

b5.3 <- 
  brm(data = d, family = gaussian,
      Divorce ~ 1 + Marriage.s + MedianAgeMarriage.s,
      prior = c(set_prior("normal(0, 10)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 500, cores = 4)

print(b5.3)

The stanplot() function is an easy way to get a default coefficient plot. You just put the brmsfit object into the function.

stanplot(b5.3)

There are numerous ways to make a coefficient plot. Another is with the mcmc_intervals() function from the bayesplot package. A nice feature of the bayesplot package is its convenient way to alter the color scheme with the color_scheme_set() function. Here, for example, we'll make the theme red. But note how the mcmc_intervals() function requires you to work with the posterior_samples() instead of the brmsfit object.

# install.packages("bayesplot", dependencies = T)
library(bayesplot)

post <- posterior_samples(b5.3)

color_scheme_set("red")
mcmc_intervals(post[, 1:4], 
               prob = .5,
               point_est = "median") +
  labs(title = "My fancy coefficient plot") +
  theme(axis.text.y = element_text(hjust = 0),
        axis.line.x = element_line(size = 1/4),
        axis.line.y = element_blank(),
        axis.ticks.y = element_blank())

Because bayesplot produces a ggplot2 object, the plot was adjustable with familiar ggplot2 syntax. For more ideas, check out this vignette.

5.1.3. Plotting multivariate posteriors.

5.1.3.1. Predictor residual plots.

To get ready to make our residual plots, we'll predict Marriage.s with MedianAgeMarriage.s.

b5.4 <- 
  brm(data = d, family = gaussian,
      Marriage.s ~ 1 + MedianAgeMarriage.s,
      prior = c(set_prior("normal(0, 10)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 500, cores = 4)

print(b5.4)

With fitted(), we compute the expected values for each State.

fitd54 <- 
  fitted(b5.4) %>%
  as_tibble() %>%
  bind_cols(d %>% select(MedianAgeMarriage.s))

After a little data processing, we can make Figure 5.3.

df54 <- 
  d %>%
  select(MedianAgeMarriage.s, Marriage.s) %>%
  bind_cols(fitd54 %>% 
              select(Estimate))

ggplot(data = df54, 
       aes(x = MedianAgeMarriage.s, y = Marriage.s)) +
  theme_bw() +
  geom_point(data = d, 
             aes(x = MedianAgeMarriage.s, y = Marriage.s), 
             size = 2, shape = 1, color = "firebrick4") +
  geom_segment(aes(xend = MedianAgeMarriage.s, yend = Estimate), 
               size = 1/4) +
  geom_line(aes(MedianAgeMarriage.s, y = Estimate), 
            color = "firebrick4") +
  coord_cartesian(ylim = range(d$Marriage.s)) +
  theme(panel.grid = element_blank())     

We get the residuals with the well-named residuals() function. Much like with brms::fitted(), brms::residuals() returns a four-vector matrix with the number of rows equal to the number of observations in the original data (by default, anyway). The vectors have the familiar names: Estimate, Est.Error, Q2.5, and Q97.5. See the brms reference manual for details.

With our residuals in hand, we just need a little more data processing to make Figure 5.4.a.

res54 <- 
  residuals(b5.4) %>%
  # To use this in ggplot2, we need to make it a tibble or data frame
  as_tibble()

df54 <-
  df54 %>%
  mutate(res     = res54$Estimate,
         Divorce = d$Divorce)

ggplot(data = df54, 
       aes(x = res, y = Divorce)) +
  theme_bw() +
  stat_smooth(method = "lm", color = "firebrick4", fill = "firebrick4", 
              alpha = 1/5, size = 1/2) +
  geom_vline(xintercept = 0, linetype = 2, color = "grey50") +
  geom_point(size = 2, color = "firebrick4", alpha = 2/3) +
  annotate("text", x = -.05, y = 14.1, label = "slower       faster") +
  coord_cartesian(ylim = c(6, 14.1)) +
  labs(x = "Marriage rate residuals") +
  theme(panel.grid = element_blank())  

To get the MedianAgeMarriage.s residuals, we have to fit the corresponding model first.

b5.4.1 <- 
  brm(data = d, family = gaussian,
      MedianAgeMarriage.s ~ 1 + Marriage.s,
      prior = c(set_prior("normal(0, 10)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 500, cores = 4)

And now we'll get the new batch of residuals, do a little data processing, and make a plot corresponding to Figure 5.4.b.

df541 <- 
  residuals(b5.4.1) %>%
  as_tibble() %>%
  select(Estimate) %>%
  bind_cols(d %>% select(Divorce))

ggplot(data = df541, 
       aes(x = Estimate, y = Divorce)) +
  theme_bw() +
  stat_smooth(method = "lm", color = "firebrick4", fill = "firebrick4", 
              alpha = 1/5, size = 1/2) +
  geom_vline(xintercept = 0, linetype = 2, color = "grey50") +
  geom_point(size = 2, color = "firebrick4", alpha = 2/3) +
  coord_cartesian(ylim = c(6, 14.1)) +
  annotate("text", x = -.14, y = 14.1, label = "younger     older") +
  labs(x = "Age of marriage residuals") +
  theme(panel.grid = element_blank())  

5.1.3.2. Counterfactual plots.

Making Figure 5.5.a. requires a little more data wrangling than before.

nd <- 
  tibble(Marriage.s = seq(from = -3, to = 3, length.out = 30),
         MedianAgeMarriage.s = rep(mean(d$MedianAgeMarriage.s, 
                                        times = 30)))

pred53.a <- predict(b5.3, newdata = nd)
fitd53.a <- fitted(b5.3, newdata = nd)

# This isn't the most tidyverse-centric way of doing things, but it just seemed easier to rely on the bracket syntax for this one
tibble(Divorce = fitd53.a[, 1],
       fll     = fitd53.a[, 3],
       ful     = fitd53.a[, 4],
       pll     = pred53.a[, 3],
       pul     = pred53.a[, 4]) %>%
  bind_cols(nd) %>%  # Note our use of the pipe, here. This allowed us to feed the tibble directly into ggplot2 without having to save it as an object.

  ggplot(aes(x = Marriage.s, y = Divorce)) +
  theme_bw() +
  geom_ribbon(aes(ymin = pll, ymax = pul),
              fill = "firebrick", alpha = 1/5) +
  geom_ribbon(aes(ymin = fll, ymax = ful),
              fill = "firebrick", alpha = 1/5) +
  geom_line(color = "firebrick4") +
  coord_cartesian(ylim = c(6, 14)) +
  labs(subtitle = "Counterfactual plot for which\nMedianAgeMarriage.s = 0") +
  theme(panel.grid = element_blank())     

We follow the same process for Figure 5.5.b.

nd <- 
  tibble(MedianAgeMarriage.s = seq(from = -3, to = 3.5, length.out = 30),
         Marriage.s = rep(mean(d$Marriage.s), times = 30))

pred53.b <- predict(b5.3, newdata = nd)
fitd53.b <- fitted(b5.3, newdata = nd)

tibble(Divorce = fitd53.b[, 1],
       fll     = fitd53.b[, 3],
       ful     = fitd53.b[, 4],
       pll     = pred53.b[, 3],
       pul     = pred53.b[, 4]) %>%
  bind_cols(nd) %>%

  ggplot(aes(x = MedianAgeMarriage.s, y = Divorce)) +
  theme_bw() +
  geom_ribbon(aes(ymin = pll, ymax = pul),
              fill = "firebrick", alpha = 1/5) +
  geom_ribbon(aes(ymin = fll, ymax = ful),
              fill = "firebrick", alpha = 1/5) +
  geom_line(color = "firebrick4") +
  coord_cartesian(ylim = c(6, 14)) +
  labs(subtitle = "Counterfactual plot for which\nMarriage.s = 0") +
  theme(panel.grid = element_blank())     

5.1.3.3. Posterior prediction plots.

In this version of Figure 5.6.a., the thin lines are the 95% intervals and the thicker lines are +/- the posterior SD, both of which are returned when you use fitted().

fitted(b5.3) %>%
  as_tibble() %>%
  bind_cols(d %>% select(Divorce, Loc)) %>%

  ggplot(aes(x = Divorce, y = Estimate)) +
  theme_bw() +
  geom_abline(linetype = 2, color = "grey50", size = .5) +
  geom_point(size = 1.5, color = "firebrick4", alpha = 3/4) +
  geom_linerange(aes(ymin = `Q2.5`, ymax = `Q97.5`),
                 size = 1/4, color = "firebrick4") +
  geom_linerange(aes(ymin = Estimate - Est.Error, ymax = Estimate + Est.Error),
                 size = 1/2, color = "firebrick4") +
  geom_text(data = . %>% filter(Loc %in% c("ID", "UT")),
            aes(label = Loc), 
            hjust = 0, nudge_x = - 0.65) +
  labs(x = "Observed divorce", y = "Predicted divorce") +
  theme(panel.grid = element_blank())

The legwork for and reproduction of Figure 5.6.b.

# This is for the average prediction error mean, +/- SD, and 95% intervals
res53 <- 
  residuals(b5.3) %>%
  as_tibble() %>%
  bind_cols(d %>% select(Loc))

# This is the 95% prediction interval
pred53 <- 
 predict(b5.3) %>%
  as_tibble() %>%
  transmute(`Q2.5` = d$Divorce - `Q2.5`,
            `Q97.5` = d$Divorce - `Q97.5`) %>%
  bind_cols(d %>% select(Loc))

# The plot
ggplot(data = res53, 
       aes(x = reorder(Loc, Estimate), y = Estimate)) +
  theme_bw() +
  geom_hline(yintercept = 0, size = 1/2, color = "grey85") +
  geom_pointrange(aes(ymin = `Q2.5`, ymax = `Q97.5`),
                  size = 2/5, shape = 20, color = "firebrick4") + 
  geom_segment(aes(y = Estimate - Est.Error, 
                   yend = Estimate + Est.Error,
                   x = Loc, xend = Loc),
               size = 1, color = "firebrick4") +
  geom_segment(data = pred53, 
               aes(y = `Q2.5`, yend = `Q97.5`,
                   x = Loc, xend = Loc),
               size = 3, color = "firebrick4", alpha = 1/10) +
  labs(x = NULL, y = NULL) +
  coord_flip(ylim = c(-6, 5)) +
  theme(panel.grid = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.y = element_text(hjust = 0))

Compared to the ones above, Figure 5.6.c. is pretty simple.

res53 %>%
  mutate(Wpc = d$WaffleHouses/d$Population) %>%

  ggplot(aes(x = Wpc, y = Estimate)) +
  theme_bw() +
  geom_point(size = 1.5, color = "firebrick4", alpha = 1/2) +
  stat_smooth(method = "lm", color = "firebrick4", size = 1/2, 
              fill = "firebrick", alpha = 1/5) + 
  geom_text_repel(data = . %>% 
                    filter(Loc %in% c("ME", "AR", "MS", "AL", 
                                      "GA", "SC", "ID")),
                  aes(label = Loc)) +
  theme(panel.grid = element_blank())
rm(d, b5.1, nd, fitd5.1, b5.2, fitd5.2, b5.3, b5.4, fitd54, df54, res54, b5.4.1, df541, pred53.a, fitd53.a, pred53.b, fitd53.b, res53, pred53)

5.2. Masked relationship

Let's load those tasty milk data.

library(rethinking)
data(milk)
d <- milk

Unload rethinking and load brms.

rm(milk)
detach(package:rethinking, unload = T)
library(brms)

You might inspect the data like this.

head(d)
glimpse(d)

McElreath has us start of with a simple univaraible milk model.

b5.5 <- 
  brm(data = d, family = gaussian,
      kcal.per.g ~ 1 + neocortex.perc,
      prior = c(set_prior("normal(0, 100)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 1)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 500, cores = 4,
      control = list(adapt_delta = 0.95))

Stan and brms did just fine with the missing data. However, the warning messages suggested we raise adapt_delta above the default 0.8. Setting it to 0.95 did the trick. Similar to the rethinking example in the text, brms warned that "Rows containing NAs were excluded from the model." This isn't necessarily a problem; the model fit just fine. But do see chapter 14 to learn how to do better.

Here's how to explicitly drop the cases with missing values on the predictor.

d %>%
  select(neocortex.perc) %>%
  head()

dcc <- 
  d %>%
  filter(complete.cases(.))              

b5.5 <- 
  brm(data = dcc, family = gaussian,
      kcal.per.g ~ 1 + neocortex.perc,
      prior = c(set_prior("normal(0, 100)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 1)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 500, cores = 4,
      control = list(adapt_delta = 0.9))

print(b5.5, digits = 3)

To get the brms answer to what McElreath did with coef(), we'll use the fixef() function.

fixef(b5.5)[2]*(76 - 55)

Yes, indeed, "that's less than 0.1 kilocalories" (p. 137).

Here's Figure 5.7., top left.

nd <- tibble(neocortex.perc = 54:80)

dffitted.a <- 
  fitted(b5.5, 
         newdata = nd,
         probs = c(.025, .975, .25, .75)) %>%
  as_tibble() %>%
  bind_cols(nd)

ggplot(data = dffitted.a, 
       aes(x = neocortex.perc, y = Estimate)) +
  theme_bw() +
  geom_ribbon(aes(ymin = `Q2.5`, ymax = `Q97.5`),
              fill = "firebrick", alpha = 1/5) +
  geom_ribbon(aes(ymin = Q25, ymax = Q75),
              fill = "firebrick4", alpha = 1/5) +
  geom_line(color = "firebrick4", size = 1/2) +
  geom_point(data = dcc, 
             aes(x = neocortex.perc, y = kcal.per.g),
             size = 2, color = "firebrick4") +
  coord_cartesian(xlim = range(dcc$neocortex.perc), 
                  ylim = range(dcc$kcal.per.g)) +
  labs(y = "kcal.per.g") +
  theme(panel.grid = element_blank())

Now we use log.mass as the new sole predictor.

dcc <-
  dcc %>%
  mutate(log.mass = log(mass))

b5.6 <- 
  brm(data = dcc, family = gaussian,
      kcal.per.g ~ 1 + log.mass,
      prior = c(set_prior("normal(0, 100)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 1)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 500, cores = 4,
      control = list(adapt_delta = 0.9))

print(b5.6, digits = 3)

Figure 5.7., top right.

# Here we get the range we'll want for nd, below
dcc %>%
  select(log.mass) %>%
  range()

nd <- 
  tibble(log.mass = seq(from = -2.5, to = 5,
                        length.out = 30))

dffitted.b <- 
  b5.6 %>%
  fitted(newdata = nd,
         probs = c(.025, .975, .25, .75)) %>%
  as_tibble() %>%
  mutate(log.mass = seq(from = -2.5, to = 5, 
                            length.out = 30))

ggplot(data = dffitted.b, 
       aes(x = log.mass, y = Estimate)) +
  theme_bw() +
  geom_ribbon(aes(ymin = `Q2.5`, ymax = `Q97.5`),
              fill = "firebrick", alpha = 1/5) +
  geom_ribbon(aes(ymin = Q25, ymax = Q75),
              fill = "firebrick4", alpha = 1/5) +
  geom_line(color = "firebrick4", size = 1/2) +
  geom_point(data = dcc, aes(x = log.mass, y = kcal.per.g),
             size = 2, color = "firebrick4") +
  coord_cartesian(xlim = range(dcc$log.mass), 
                  ylim = range(dcc$kcal.per.g)) +
  labs(y = "kcal.per.g") +
  theme(panel.grid = element_blank())

Finally, we're ready to fit the "joint model" including both predictors. Note, to converge properly, the HMC chains required a longer warmup period and adapt_delta required an even higher setting. Life would be better if we ditched that uniform prior on sigma.

b5.7 <- 
  brm(data = dcc, family = gaussian,
      kcal.per.g ~ 1 + neocortex.perc + log.mass,
      prior = c(set_prior("normal(0, 100)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 1)", class = "sigma")),
      chains = 4, iter = 4000, warmup = 2000, cores = 4,
      control = list(adapt_delta = 0.99))

print(b5.7, digits = 3)

Preping for and reproducing Figure 5.7., bottom left.

nd <- 
  tibble(neocortex.perc = 54:80 %>% as.double(),
         log.mass = mean(dcc$log.mass))

dffitted.c <- 
  b5.7 %>%
  fitted(newdata = nd, 
         probs = c(.025, .975, .25, .75)) %>%
  as_tibble() %>%
  bind_cols(nd)

ggplot(data = dffitted.c, 
       aes(x = neocortex.perc, y = Estimate)) +
  theme_bw() +
  geom_ribbon(aes(ymin = `Q2.5`, ymax = `Q97.5`),
              fill = "firebrick", alpha = 1/5) +
  geom_ribbon(aes(ymin = Q25, ymax = Q75),
              fill = "firebrick4", alpha = 1/5) +
  geom_line(color = "firebrick4", size = 1/2) +
  geom_point(data = dcc, aes(x = neocortex.perc, y = kcal.per.g),
             size = 2, color = "firebrick4") +
  coord_cartesian(xlim = range(dcc$neocortex.perc), 
                  ylim = range(dcc$kcal.per.g)) +
  labs(y = "kcal.per.g") +
  theme(panel.grid = element_blank())

Prepping for and reproducing Figure 5.7., bottom right.

nd <- 
  tibble(log.mass = seq(from = -2.5, to = 5,
                        length.out = 30),
         neocortex.perc = mean(dcc$neocortex.perc))

dffitted.d <- 
  b5.7 %>%
  fitted(newdata = nd,
         probs = c(.025, .975, .25, .75)) %>%
  as_tibble() %>%
  bind_cols(nd)

ggplot(data = dffitted.d, 
       aes(x = log.mass, y = Estimate)) +
  theme_bw() +
  geom_ribbon(aes(ymin = `Q2.5`, ymax = `Q97.5`),
              fill = "firebrick", alpha = 1/5) +
  geom_ribbon(aes(ymin = Q25, ymax = Q75),
              fill = "firebrick4", alpha = 1/5) +
  geom_line(color = "firebrick4", size = 1/2) +
  geom_point(data = dcc, aes(x = log.mass, y = kcal.per.g),
             size = 2, color = "firebrick4") +
  coord_cartesian(xlim = range(dcc$log.mass), 
                  ylim = range(dcc$kcal.per.g)) +
  labs(y = "kcal.per.g") +
  theme(panel.grid = element_blank())
rm(d, b5.5, dcc, nd, dffitted.a, b5.6, dffitted.b, b5.7, dffitted.c, dffitted.d)

5.3. When adding variables hurts

5.3.1. Multicollinear legs.

Let's simulate some leg data.

N <- 100
set.seed(851)

d <- 
  tibble(height    = rnorm(N, mean = 10, sd = 2),
         leg_prop  = runif(N, min = 0.4, max = 0.5),
         leg_left  = leg_prop*height + rnorm(N, mean = 0, sd = 0.02),
         leg_right = leg_prop*height + rnorm(N, mean = 0, sd = 0.02))

leg_left and leg_right are highly correlated.

d %>%
  select(leg_left:leg_right) %>%
  cor() %>%
  round(digits = 4)

Here's our attempt to predict height with both legs.

b5.8 <- 
  brm(data = d, family = gaussian,
      height ~ 1 + leg_left + leg_right,
      prior = c(set_prior("normal(10, 100)", class = "Intercept"),
                set_prior("normal(2, 10)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 500, cores = 4)

print(b5.8)

Instead of a coefficient plot like McElreath did by nesting precis() in plot(), why not make stacked density plots?

color_scheme_set("red")

stanplot(b5.8, 
         type = "areas", 
         prob = .5, 
         point_est = "median") +
  labs(title = "The coefficient plot for the two-leg model",
       subtitle = "Holy smokes; look at the widths of those betas!") +
  theme_bw() +
  theme(text = element_text(size = 14),
        axis.ticks.y = element_blank(),
        axis.text.y = element_text(hjust = 0))

Note. You can use the brms::stanplot() function without explicitly loading the bayesplot package. But loading bayesplot allows you to set the color scheme with color_scheme_set().

This is perhaps the simplest way to plot the bivariate posterior of our two predictor coefficients, Figure 5.8.a.

pairs(b5.8, pars = parnames(b5.8)[2:3])

If you'd like a nicer and more focused attempt, you might have to revert to the posterior_samples() function and a little ggplot2 code.

posterior_samples(b5.8) %>%

  ggplot(aes(x = b_leg_left, y = b_leg_right)) +
  theme_bw() +
  geom_point(color = "firebrick", alpha = 1/10, size = 1/3) +
  theme(panel.grid = element_blank())

While we're at it, you can make a similar plot with the mcmc_scatter() function.

posterior_samples(b5.8) %>%

  mcmc_scatter(pars = c("b_leg_left", "b_leg_right"),
               size = 1/3, 
               alpha = 1/10) +
  theme_bw() +
  theme(panel.grid = element_blank())

Here's the posterior of the sum of the two regression coefficients, Figure 5.8.b.

post <- posterior_samples(b5.8)

  ggplot(data = post,
         aes(x = b_leg_left + b_leg_right)) +
  theme_bw() +
  geom_density(fill = "firebrick4", size = 0) +
  geom_vline(xintercept = median(post$b_leg_left + post$b_leg_right),
             color = "white", linetype = 2) +
  scale_y_continuous(NULL, breaks = NULL) +
  labs(subtitle = "Posterior median = dashed line") +
  theme(panel.grid = element_blank())

Now we fit the model after ditching one of the leg lengths.

b5.9 <- 
  brm(data = d, family = gaussian,
      height ~ 1 + leg_left,
      prior = c(set_prior("normal(10, 100)", class = "Intercept"),
                set_prior("normal(2, 10)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 500, cores = 4)

print(b5.9)

Here's the density in Figure 5.8.b.

post <- posterior_samples(b5.9)

  ggplot(data = post,
         aes(x = b_leg_left)) +
  theme_bw() +
  geom_density(fill = "firebrick4", size = 0) +
  geom_vline(xintercept = median(post$b_leg_left),
             color = "white", linetype = 3) +
  scale_y_continuous(NULL, breaks = NULL) +
  labs(subtitle = "Posterior median = dotted line") +
  theme(panel.grid = element_blank())

5.3.2. Multicollinear milk.

library(rethinking)
data(milk)
d <- milk

rm(milk)
detach(package:rethinking, unload = TRUE)
library(brms)

Let's fit the two models in the text.

b5.10 <- 
  brm(data = d, family = gaussian,
      kcal.per.g ~ 1 + perc.fat,
      prior = c(set_prior("normal(.6, 10)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 500, cores = 4)

b5.11 <- 
  brm(data = d, family = gaussian,
      kcal.per.g ~ 1 + perc.lactose,
      prior = c(set_prior("normal(.6, 10)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 500, cores = 4)

print(b5.10, digits = 3)
print(b5.11, digits = 3)

As McElreath wrote, "watch what happens when we place both predictor varaibles in the same regression model" (p. 146)

b5.12 <- 
  brm(data = d, family = gaussian,
      kcal.per.g ~ 1 + perc.fat + perc.lactose,
      prior = c(set_prior("normal(.6, 10)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 500, cores = 4)

print(b5.12, digits = 3)

You can make custom pairs plots with GGalley, which will also compute the point estimates for the bivariate correlations. Here's a default plot.

#install.packages("GGally", dependencies = T)
library(GGally)

ggpairs(data = d, columns = c(3:4, 6)) + 
  theme_classic()

But you can customize these, too. E.g.,

my_diag <- function(data, mapping, ...){
  pd <- ggplot(data = data, mapping = mapping) + 
    geom_density(fill = "firebrick4", size = 0)
  pd
}

my_lower <- function(data, mapping, ...){
  pl <- ggplot(data = data, mapping = mapping) + 
    geom_smooth(method = "lm", color = "firebrick4", size = 1/3, se = F) +
    geom_point(color = "firebrick", alpha = .8, size = 1/4)
  pl
  }

# Then plug those custom functions into ggpairs
ggpairs(data = d, columns = c(3:4, 6),
        mapping = ggplot2::aes(color = "black"),
        diag = list(continuous = my_diag),
        lower = list(continuous = my_lower)) + 
  theme_bw() +
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        strip.background = element_rect(fill = "white"))

5.3.3. Post-treatment bias

Once again, we'll simulate our data.

N <- 100

set.seed(17)

d <- 
  tibble(
    h0        = rnorm(N, 10, 2), 
    treatment = rep(0:1, each = N/2),
    fungus    = rbinom(N, size = 1, prob = .5 - treatment*0.4),
    h1        = h0 + rnorm(N, mean = 5 - 3*fungus)
  )

We'll use head() to peek at the data, which we put in a tibble rather than a data frame.

d %>% head()

These data + the model were rough on Stan, at first, which spat out warnings about divergent transitions. The model ran fine after setting warmup = 1000 and adapt_delta = .99.

b5.13 <- 
  brm(data = d, family = gaussian,
      h1 ~ 1 + h0 + treatment + fungus,
      prior = c(set_prior("normal(0, 100)", class = "Intercept"),
                set_prior("normal(0, 10)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 1000, cores = 4,
      control = list(adapt_delta = 0.99))

print(b5.13)

Fitting the model after excluding fungus, our post-treatment variable.

b5.14 <- 
  brm(data = d, family = gaussian,
      h1 ~ 1 + h0 + treatment,
      prior = c(set_prior("normal(0, 100)", class = "Intercept"),
                set_prior("normal(0, 10)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 1000, cores = 4)

print(b5.14)
rm(d, N, b5.8, post, b5.9, b5.10, b5.11, b5.12, 
   my_diag, my_lower, b5.13, b5.14)

5.4. Categorical varaibles

5.4.1. Binary categories.

Reload the Howell1 data.

library(rethinking)
data(Howell1)
d <- Howell1

Unload rethinking and load brms.

rm(Howell1)
detach(package:rethinking, unload = T)
library(brms)

Just in case you forgot what these data were like:

d %>%
  glimpse()

Let's fit the first height model.

Note. The uniform prior McElreath used in the text in conjunction with his map() function seemed to cause problems for the HMC chains, here. After experimenting with start values, increasing warmup, and increasing adapt_delta, switching out the prior did the trick. Anticipating chapter 8, I recommend you use a weakly-regularizing half Cauchy for $\sigma$.

b5.15 <- 
  brm(data = d, family = gaussian,
      height ~ 1 + male,
      prior = c(set_prior("normal(178, 100)", class = "Intercept"),
                set_prior("normal(0, 10)", class = "b"),
                set_prior("cauchy(0, 2)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 500, cores = 4)

print(b5.15)

Our samples from the posterior are already in the HMC iterations. All we need to do is put them in a data frame and then put them to work.

post <- posterior_samples(b5.15)

quantile(post[ , 1] + post[ , 2], c(.025, .975))

A more tidyverse-centric way to do this might be something like:

post <-
  b5.15 %>%
  posterior_samples()

post %>%
  mutate(mu_male = b_Intercept + b_male) %>%
  summarise(LL = quantile(mu_male, .025) %>% round(digits = 2),
            UL = quantile(mu_male, .975) %>% round(digits = 2))
Overthinking: Re-parameterizing the model.

Everyone has their own idiosyncratic way of coding. One of my quirks is that I always explicitly specify a model’s intercept following the form y ~ 1 + x, where y is the criterion, x stands for the predictors, and 1 is the intercept. You don’t have to do this, of course. You could just code y ~ x to get the same results. brm() assumes you want that intercept. One of the reasons I like the verbose version is it reminds me to think about the intercept and to include it in my priors. Another nice feature is that is helps me make sense of the code for this model: height ~ 0 + male + female. When we replace … ~ 1 + … with … ~ 0 + …, we tell brm() to remove the intercept. Removing the intercept allows us to include ALL levels of a given categorical variable in our model. In this case, we’ve expressed sex as two dummies, female and male. Taking out the intercept lets us put both dummies into the formula.

d <-
  d %>%
  mutate(female = 1 - male)

b5.15b <- 
  brm(data = d, family = gaussian,
      height ~ 0 + male + female,
      prior = c(set_prior("normal(178, 100)", class = "b"),
                set_prior("cauchy(0, 2)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 500, cores = 4)

print(b5.15b)

Oh man, that sweet half Cauchy just keeps getting the job done.

5.4.2. Many categories

We just can't keep ourselves from picking that milk back up again and again.

library(rethinking)
data(milk)
d <- milk

Unload rethinking and load brms.

rm(milk)
detach(package:rethinking, unload = TRUE)
library(brms)

With the tidyverse, we can peek at clade with distinct() in the place of base R unique().

d %>%
  distinct(clade)

As clade has 4 categories, let's convert these to 4 dummy variables.

d <- 
  d %>%
  mutate(clade.NWM = ifelse(clade == "New World Monkey", 1, 0),
         clade.OWM = ifelse(clade == "Old World Monkey", 1, 0),
         clade.S   = ifelse(clade == "Strepsirrhine", 1, 0),
         clade.Ape = ifelse(clade == "Ape", 1, 0))

Now we'll fit the model with three of the four dummies.

b5.16 <- 
  brm(data = d, family = gaussian,
      kcal.per.g ~ 1 + clade.NWM + clade.OWM + clade.S,
      prior = c(set_prior("normal(.6, 10)", class = "Intercept"),
                set_prior("normal(0, 1)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 500, cores = 4,
      control = list(adapt_delta = 0.8))

print(b5.16)

Here we grab the chains, our draws from the posterior.

post <- 
  b5.16 %>%
  posterior_samples()

head(post)

You might compute averages for each category and summarizing the results with the transpose of base R's apply() function, rounding to two digits of precision.

post$mu.Ape <- post$b_Intercept
post$mu.NWM <- post$b_Intercept + post$b_clade.NWM
post$mu.OWM <- post$b_Intercept + post$b_clade.OWM
post$mu.S   <- post$b_Intercept + post$b_clade.S

round(t(apply(post[ ,7:10], 2, quantile, c(.5, .025, .975))), digits = 2)

Here's a more tidyverse sort of way to get the same thing.

post %>%
  mutate(mu.Ape = b_Intercept,
         mu.NWM = b_Intercept + b_clade.NWM,
         mu.OWM = b_Intercept + b_clade.OWM,
         mu.S   = b_Intercept + b_clade.S) %>%
  select(mu.Ape:mu.S) %>% 
  gather(parameter) %>%
  group_by(parameter) %>%
  summarise(median = median(value) %>% round(digits = 2),
            LL = quantile(value, probs = .025) %>% round(digits = 2),
            UL = quantile(value, probs = .975) %>% round(digits = 2))

Getting summary statistics for the difference between NWM and OWM.

# Base R
round(quantile(post$mu.NWM - post$mu.OWM, probs = c(.025, .5, .975)), digits = 2)

# tidyverse
post %>%
  mutate(dif = mu.NWM - mu.OWM) %>%
  summarise(median = median(dif) %>% round(digits = 2),
            LL = quantile(dif, probs = .025) %>% round(digits = 2),
            UL = quantile(dif, probs = .975) %>% round(digits = 2))

5.4.3. Adding regular predictor variables.

Using the code below, there's no need to transform d$clade into d$clade_id. The advantage of this approach is the indices in the model summary are more descriptive than a[1] through a[4].

b5.16_alt <- 
  brm(data = d, family = gaussian,
      kcal.per.g ~ 0 + clade,
      prior = c(set_prior("normal(.6, 10)", class = "b"),
                set_prior("uniform(0, 10)", class = "sigma")),
      chains = 4, iter = 2000, warmup = 500, cores = 4)

print(b5.16_alt)
rm(d, b5.15, post, b5.15b, b5.16, b5.16_alt)

5.5. ~~Ordinary least squares and lm()~~

Since this section centers on the frequentist lm() function, I'm going to largely ignore it. A couple things, though. You'll note how the brms package uses the lm()-like design formula syntax. Although not as pedagogical as the more formal rethinking syntax, it has the advantage of cohering with the popular lme4 syntax for multilevel models.

Also, on page 161, McElreath clarifies that one cannot use the I() syntax with his rethinking package. Not so with brms. The I() syntax works just fine with brms::brm().

Note. The analyses in this document were done with:

Reference

McElreath, R. (2016). Statistical rethinking: A Bayesian course with examples in R and Stan. Chapman & Hall/CRC Press.



joepowers16/rethinking documentation built on June 2, 2019, 6:52 p.m.